suppressPackageStartupMessages({
library(epiwraps)
library(rtracklayer)
library(GenomicRanges)
library(ggplot2)
library(ComplexHeatmap)
library(chromVAR)
library(motifmatchr)
library(memes)
library(MotifDb)
library(universalmotif)
library(TFBSTools)
library(AnnotationHub)
library(RColorBrewer)
library(viridis)
library(viridisLite)
library(cowplot)
})
## See system.file("LICENSE", package="MotifDb") for use restrictions.
theme_set(theme_bw())
set.seed(123)
load ATAC data for TDG_KO
bamfiles_ctrl <- list.files("../Tdg_wt/aligned/",pattern = "^Kg_tdg_M(.*)\\.bam$", full =TRUE)
bamfiles_TDG <- list.files("../Tdg_wt/aligned/",pattern = "^Kg_tdg_ko(.*)\\.bam$", full =TRUE)
names(bamfiles_ctrl) <- c("wt_1", "wt_2")
names(bamfiles_TDG) <- c("tdg +/- 1", "tdg +/- 2","tdg +/- 3")
Ctrl_Full_bigwig <- list.files("../Tdg_wt//tracks/",pattern = "^Kg_tdg_M(.*).bw$", full =TRUE)
Tdg_Full_bigwig <- list.files("../Tdg_wt//tracks/",pattern = "^Kg_tdg_ko(.*).bw$", full =TRUE)
give names to the tracks
names(Ctrl_Full_bigwig)<- c("wt_1", "wt_2")
names(Tdg_Full_bigwig) <- c(c("tdg +/- 1", "tdg +/- 2","tdg +/- 3"))
total_bw_list <- c(Ctrl_Full_bigwig,Tdg_Full_bigwig)
load peaks
peaks
peaks_TDG_BP <- c(TDG_KO1_BP = rtracklayer::import("../Tdg_wt/peaks/peaks/Kg_tdg_ko_1_EKDL220000948-1a_H22J7DSX3_L2_peaks.broadPeak"), TDG_KO2_BP = rtracklayer::import("../Tdg_wt/peaks/peaks/Kg_tdg_ko_2_EKDL220003833-1a_HJ5T2DSX3_L1_peaks.broadPeak"),TDG_KO3_BP = rtracklayer::import("../Tdg_wt/peaks/peaks/Kg_tdg_ko_3_EKDL220003834-1a_HJ5T2DSX3_L1_peaks.broadPeak"))
peaks_Ctrl_BP <- c(Control_1 = rtracklayer::import("../Tdg_wt/peaks/peaks/Kg_tdg_M_1_EKDL220003835-1a_HJ5T2DSX3_L1_peaks.broadPeak"), Control_2 = rtracklayer::import("../Tdg_wt/peaks/peaks/Kg_tdg_M_3_EKDL220003836-1a_HJ5T2DSX3_L1_peaks.broadPeak"))
blacklist
blacklist <- import("../object_resources/mm10.blacklist_chrM.bed")
blacklist
## GRanges object with 165 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 24612621-24612850 *
## [2] chr1 48881431-48881690 *
## [3] chr1 58613871-58614090 *
## [4] chr1 78573921-78574140 *
## [5] chr1 88217961-88221950 *
## ... ... ... ...
## [161] chr9 24541941-24542200 *
## [162] chr9 35305121-35305620 *
## [163] chr9 110281191-110281400 *
## [164] chr9 123872951-123873160 *
## [165] chrM 2-16299 *
## -------
## seqinfo: 20 sequences from an unspecified genome; no seqlengths
seqlevelsStyle(blacklist) <- "ensembl"
get consensus peaks
###total
x <- c(peaks_Ctrl_BP,peaks_TDG_BP)
total_consensus_peaks <- reduce(unlist(GRangesList(x)), with.revmap=TRUE)
y <- keepStandardChromosomes(total_consensus_peaks, pruning.mode = "coarse")
total_consensus_peaks <- granges(y[lengths(y$revmap)>1])
seqlevelsStyle(blacklist) <- "ensembl"
## Warning in .replace_seqlevels_style(x_seqlevels, value): found more than one
## best sequence renaming map compatible with seqname style "ensembl" for this
## object, using the first one
total_consensus_peaks <- total_consensus_peaks[!overlapsAny(total_consensus_peaks, blacklist)]# thats how you get rid of blacklisted peaks!!!!
total_consensus_peaks
## GRanges object with 34791 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] 1 3193206-3194116 *
## [2] 1 3371834-3373144 *
## [3] 1 3670571-3672240 *
## [4] 1 4256517-4258768 *
## [5] 1 4326626-4326879 *
## ... ... ... ...
## [34787] Y 90806973-90807763 *
## [34788] Y 90810055-90813626 *
## [34789] Y 90819329-90819916 *
## [34790] Y 90825910-90828204 *
## [34791] Y 90835647-90836399 *
## -------
## seqinfo: 22 sequences from an unspecified genome; no seqlengths
total_consensus_peaks_2000 <- total_consensus_peaks[width(total_consensus_peaks)<=2000]
x <- c(peaks_TDG_BP)
TDG_consensus_peaks <- reduce(unlist(GRangesList(x)), with.revmap=TRUE)
y <- keepStandardChromosomes(TDG_consensus_peaks, pruning.mode = "coarse")
TDG_consensus_peaks <- granges(y[lengths(y$revmap)>1]) #for standard chromosomes use keep this line of code
seqlevelsStyle(blacklist) <- "ensembl"
## Warning in .replace_seqlevels_style(x_seqlevels, value): found more than one
## best sequence renaming map compatible with seqname style "ensembl" for this
## object, using the first one
TDG_consensus_peaks <- TDG_consensus_peaks[!overlapsAny(TDG_consensus_peaks, blacklist)]# thats how you get rid of blacklisted peaks!!!!
TDG_consensus_peaks
## GRanges object with 7735 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] 1 3193206-3194116 *
## [2] 1 4256517-4258752 *
## [3] 1 4559822-4560973 *
## [4] 1 4766599-4766942 *
## [5] 1 4834934-4835688 *
## ... ... ... ...
## [7731] Y 90786836-90791666 *
## [7732] Y 90798938-90800732 *
## [7733] Y 90811286-90813626 *
## [7734] Y 90819329-90819916 *
## [7735] Y 90825910-90828112 *
## -------
## seqinfo: 22 sequences from an unspecified genome; no seqlengths
import 5fC/no5fC sites
sites_5fC <- readRDS("../object_resources/5fC/sites_5fC.rds")
sites_no5fC <- readRDS("../object_resources/5fC/sites_no5fC.rds")
import TSS with and without 5fC in proximity
TSS_2KB_5fC <- readRDS("../object_resources/5fC/TSS_2KB_5fC.rds")
TSS_2KB_no5fC <- readRDS("../object_resources/5fC/TSS_2KB_no5fC.rds")
calculation of normfactor generation of normalization factors
# background normalization
bg_nf_full <- bwNormFactors(total_bw_list, method = "background")
## Warning: bwNormFactors is deprecated, please gradually switch to
## `getNormFactors`.
## Comparing coverage in random regions...
p1 <- fragSizesDist(x = c(bamfiles_ctrl, bamfiles_TDG),what = "1")+ xlim(0,600)+ scale_x_continuous(n.breaks = 22)+scale_color_brewer(palette = "Dark2")+theme(text = element_text(size = 12))+ggtitle("Fragment Size Distribution (Chr1)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
p1 <- fragSizesDist(x = c(bamfiles_ctrl, bamfiles_TDG),what = "1")+ xlim(0,600)+scale_color_brewer(palette = "Dark2")+theme(text = element_text(size = 12))+ggtitle("Fragment Size Distribution (Chr1)")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
p1+theme(legend.text = element_text(face="italic"))
## Warning: Removed 337053 rows containing non-finite values (`stat_density()`).
saveRDS(p1+theme(legend.text = element_text(face="italic")),"../object_resources/figure_2_sup_meta/p1.rds")
#p1 <- readRDS("../object_resources/figure_2_sup_meta/p1.rds")
broad total consensus peaks
m_bp_total <- signal2Matrix(total_bw_list,regions = total_consensus_peaks_2000,w = 10L)
## Reading ../Tdg_wt//tracks//Kg_tdg_M_1_EKDL220003835-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_M_3_EKDL220003836-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_1_EKDL220000948-1a_H22J7DSX3_L2.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_2_EKDL220003833-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_3_EKDL220003834-1a_HJ5T2DSX3_L1.bw
m_bp_total_bg <- rescaleSignalMatrices(m_bp_total,bg_nf_full)
#saveRDS(m_bp_total,"m_bp_total.rds")
saveRDS(m_bp_total_bg,"../object_resources/figure_2_sup_meta/m_bp_total_bg.rds")
#m_bp_total_bg <- readRDS("m_bp_total_bg.rds")
names(m_bp_total_bg) <- c("wt 1","wt 2","Tdg +|- 1","Tdg +|- 2","Tdg +|- 3")
p2 <- plotEnrichedHeatmaps(m_bp_total_bg,raster_resize_mat = TRUE , row_title="Accessible regions",scale_title = "backgr. \nnorm.\ndensity", colors = rocket(100),column_title_gp=gpar(fontface = "italic"),row_title_gp = gpar(fontsize = 11),top_annotation = FALSE, use_raster = TRUE)
p2
saveRDS(p2,"../object_resources/figure_2_sup_meta/p2.rds")
#p2 <- readRDS("p2.rds")
m_5fC <- signal2Matrix(c(total_bw_list),w = 10L, regions = sites_5fC)
## Reading ../Tdg_wt//tracks//Kg_tdg_M_1_EKDL220003835-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_M_3_EKDL220003836-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_1_EKDL220000948-1a_H22J7DSX3_L2.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_2_EKDL220003833-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_3_EKDL220003834-1a_HJ5T2DSX3_L1.bw
m_5fC_bg <- rescaleSignalMatrices(m_5fC,bg_nf_full)
#saveRDS(m_5fC,"m_5fC.rds")
saveRDS(m_5fC_bg,"../object_resources/figure_2_sup_meta/m_5fC_bg.rds")
#m_5fC_bg <- readRDS("m_5fC_bg.rds")
plotting heatmaps
names(m_5fC_bg) <- c("wt 1","wt 2","Tdg +|- 1","Tdg +|- 2","Tdg +|- 3")
p3 <- plotEnrichedHeatmaps(m_5fC_bg,raster_resize_mat = TRUE, row_title="5fC sites",scale_title = "backgr. \nnorm. \ndensity",colors = rocket(100),column_title_gp=gpar(fontface = "italic"),row_title_gp = gpar(fontsize = 11),top_annotation = FALSE, use_raster = TRUE)
p3
saveRDS(p3,"../object_resources/figure_2_sup_meta/p3.rds")
#p3 <- readRDS("p3.rds")
generation of signal matrices for 5fC TSS
m_TSS_5fC <- signal2Matrix(total_bw_list,w = 10L, regions = TSS_2KB_5fC)
## Reading ../Tdg_wt//tracks//Kg_tdg_M_1_EKDL220003835-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_M_3_EKDL220003836-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_1_EKDL220000948-1a_H22J7DSX3_L2.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_2_EKDL220003833-1a_HJ5T2DSX3_L1.bw
## Reading ../Tdg_wt//tracks//Kg_tdg_ko_3_EKDL220003834-1a_HJ5T2DSX3_L1.bw
m_TSS_5fC_bg <- rescaleSignalMatrices(m_TSS_5fC,bg_nf_full)
saveRDS(m_TSS_5fC,"../object_resources/figure_2_sup_meta/m_TSS_5fC.rds")
#saveRDS(m_TSS_5fC_bg,"../object_resources/figure_2_sup_meta/m_TSS_5fC_bg.rds")
#m_TSS_5fC_bg <- readRDS("../object_resources/figure_2_sup_meta/m_TSS_5fC_bg.rds")
names(m_TSS_5fC_bg) <- c("wt 1","wt 2","Tdg +|- 1","Tdg +|- 2","Tdg +|- 3")
plotting heatmaps
p4 <- plotEnrichedHeatmaps(m_TSS_5fC_bg,raster_resize_mat = TRUE, row_title="TSS with 5fC",scale_title = "backgr. \nnorm.\ndensity", axis_name = c("-2kb","TSS","+2kb"),colors = rocket(100),column_title_gp=gpar(fontface = "italic"),row_title_gp = gpar(fontsize = 11),top_annotation = FALSE,use_raster = TRUE)
print(p4)
saveRDS(p4,"../object_resources/figure_2_sup_meta/p4.rds")
#p4 <- readRDS("../object_resources/figure_2_sup_meta/p4.rds")
pp <- plot_grid(p1,grid::grid.grabExpr(draw(p2)),grid::grid.grabExpr(draw(p3)),grid::grid.grabExpr(draw(p4)),nrow = 4,labels =c("A","B","C","D"),ncol = 1)
## Warning: Removed 337053 rows containing non-finite values (`stat_density()`).
pp
pdf("figure_2_sup_meta.pdf", width=9, height=8)
pp
dev.off()
## png
## 2
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] cowplot_1.1.1 viridis_0.6.2 viridisLite_0.4.1
## [4] RColorBrewer_1.1-3 AnnotationHub_2.22.1 BiocFileCache_1.14.0
## [7] dbplyr_2.1.1 TFBSTools_1.28.0 universalmotif_1.8.5
## [10] MotifDb_1.32.0 Biostrings_2.58.0 XVector_0.30.0
## [13] memes_0.99.11 motifmatchr_1.12.0 chromVAR_1.12.0
## [16] ggplot2_3.4.2 rtracklayer_1.50.0 epiwraps_0.99.62
## [19] EnrichedHeatmap_1.29.2 ComplexHeatmap_2.15.1 GenomicRanges_1.42.0
## [22] GenomeInfoDb_1.26.7 IRanges_2.24.1 S4Vectors_0.28.1
## [25] BiocGenerics_0.36.1
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 R.utils_2.11.0
## [3] tidyselect_1.2.0 poweRlaw_0.70.6
## [5] RSQLite_2.2.11 AnnotationDbi_1.52.0
## [7] htmlwidgets_1.5.4 BiocParallel_1.24.1
## [9] munsell_0.5.0 codetools_0.2-19
## [11] DT_0.22 miniUI_0.1.1.1
## [13] withr_2.5.0 colorspace_2.1-0
## [15] Biobase_2.50.0 highr_0.10
## [17] knitr_1.42 rstudioapi_0.13
## [19] labeling_0.4.2 Rdpack_2.3
## [21] MatrixGenerics_1.2.1 GenomeInfoDbData_1.2.4
## [23] farver_2.1.1 bit64_4.0.5
## [25] vctrs_0.6.1 generics_0.1.3
## [27] xfun_0.38 biovizBase_1.38.0
## [29] ggseqlogo_0.1 R6_2.5.1
## [31] doParallel_1.0.17 splitstackshape_1.4.8
## [33] clue_0.3-60 locfit_1.5-9.4
## [35] AnnotationFilter_1.14.0 bitops_1.0-7
## [37] cachem_1.0.7 DelayedArray_0.16.3
## [39] assertthat_0.2.1 promises_1.2.0.1
## [41] scales_1.2.1 nnet_7.3-17
## [43] gtable_0.3.3 Cairo_1.6-0
## [45] ensembldb_2.14.1 seqLogo_1.56.0
## [47] rlang_1.1.0 GlobalOptions_0.1.2
## [49] splines_4.0.3 lazyeval_0.2.2
## [51] dichromat_2.0-0.1 checkmate_2.1.0
## [53] BiocManager_1.30.20 yaml_2.3.7
## [55] reshape2_1.4.4 GenomicFeatures_1.42.3
## [57] backports_1.4.1 httpuv_1.6.9
## [59] Hmisc_4.6-0 tools_4.0.3
## [61] ellipsis_0.3.2 jquerylib_0.1.4
## [63] Rcpp_1.0.10 plyr_1.8.8
## [65] base64enc_0.1-3 progress_1.2.2
## [67] zlibbioc_1.36.0 purrr_1.0.1
## [69] RCurl_1.98-1.6 prettyunits_1.1.1
## [71] rpart_4.1.16 openssl_2.0.0
## [73] GetoptLong_1.0.5 GenomicFiles_1.26.0
## [75] SummarizedExperiment_1.20.0 cluster_2.1.4
## [77] magrittr_2.0.3 magick_2.7.3
## [79] data.table_1.14.8 circlize_0.4.15
## [81] ProtGenerics_1.22.0 matrixStats_0.63.0
## [83] hms_1.1.1 mime_0.12
## [85] evaluate_0.20 xtable_1.8-4
## [87] XML_3.99-0.9 jpeg_0.1-10
## [89] gridExtra_2.3 shape_1.4.6
## [91] compiler_4.0.3 biomaRt_2.46.3
## [93] tibble_3.2.1 crayon_1.5.2
## [95] R.oo_1.24.0 htmltools_0.5.5
## [97] later_1.3.0 tzdb_0.3.0
## [99] Formula_1.2-5 tidyr_1.2.0
## [101] DBI_1.1.3 MASS_7.3-56
## [103] rappdirs_0.3.3 Matrix_1.5-3
## [105] readr_2.1.2 cli_3.6.1
## [107] rbibutils_2.2.8 R.methodsS3_1.8.1
## [109] Gviz_1.34.1 pkgconfig_2.0.3
## [111] GenomicAlignments_1.26.0 TFMPvalue_0.0.8
## [113] foreign_0.8-84 plotly_4.10.0
## [115] xml2_1.3.3 foreach_1.5.2
## [117] annotate_1.68.0 bslib_0.3.1
## [119] DirichletMultinomial_1.32.0 stringr_1.5.0
## [121] VariantAnnotation_1.36.0 digest_0.6.31
## [123] pracma_2.3.8 CNEr_1.26.0
## [125] rmarkdown_2.13 htmlTable_2.4.0
## [127] edgeR_3.32.1 curl_5.0.0
## [129] shiny_1.7.1 Rsamtools_2.6.0
## [131] gtools_3.9.4 rjson_0.2.21
## [133] lifecycle_1.0.3 jsonlite_1.8.4
## [135] askpass_1.1 limma_3.46.0
## [137] BSgenome_1.58.0 fansi_1.0.4
## [139] pillar_1.9.0 lattice_0.21-8
## [141] KEGGREST_1.30.1 fastmap_1.1.1
## [143] httr_1.4.2 survival_3.3-1
## [145] GO.db_3.12.1 interactiveDisplayBase_1.28.0
## [147] glue_1.6.2 UpSetR_1.4.0
## [149] png_0.1-7 iterators_1.0.14
## [151] BiocVersion_3.12.0 bit_4.0.5
## [153] stringi_1.7.12 sass_0.4.1
## [155] blob_1.2.2 latticeExtra_0.6-29
## [157] caTools_1.18.2 memoise_2.0.1
## [159] dplyr_1.1.1